{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "fb7c46c9",
   "metadata": {},
   "outputs": [],
   "source": [
    "from elasticsearch import Elasticsearch, helpers\n",
    "from urllib.request import urlopen\n",
    "import getpass\n",
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "4bbb23fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "ELASTCSEARCH_CERT_PATH = \"/Users/liuxg/elastic/elasticsearch-8.10.0/config/certs/http_ca.crt\"\n",
    " \n",
    "client = Elasticsearch(  ['https://localhost:9200'],\n",
    "    basic_auth = ('elastic', 'vXDWYtL*my3vnKY9zCfL'),\n",
    "    ca_certs = ELASTCSEARCH_CERT_PATH,\n",
    "    verify_certs = True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9f054323",
   "metadata": {},
   "source": [
    "# Confirm that the client has connected with this test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "ec9eccb5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'name': 'liuxgm.local', 'cluster_name': 'elasticsearch', 'cluster_uuid': 'lM5ZgEQNTkO8phtT5qZAHQ', 'version': {'number': '8.10.0', 'build_flavor': 'default', 'build_type': 'tar', 'build_hash': 'e338da74c79465dfdc204971e600342b0aa87b6b', 'build_date': '2023-09-07T08:16:21.960703010Z', 'build_snapshot': False, 'lucene_version': '9.7.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}\n"
     ]
    }
   ],
   "source": [
    "print(client.info())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "607a93b0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ObjectApiResponse({'acknowledged': True})"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "client.ingest.put_pipeline(\n",
    "    id=\"elser-ingest-pipeline\", \n",
    "    description=\"Ingest pipeline for ELSER\",\n",
    "    processors=[\n",
    "    {\n",
    "      \"inference\": {\n",
    "        \"model_id\": \".elser_model_1\",\n",
    "        \"target_field\": \"ml\",\n",
    "        \"field_map\": {\n",
    "          \"plot\": \"text_field\"\n",
    "        },\n",
    "        \"inference_config\": {\n",
    "          \"text_expansion\": {\n",
    "            \"results_field\": \"tokens\"\n",
    "          }\n",
    "        }\n",
    "      }\n",
    "    }\n",
    "  ]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "c137c450",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The index has already existed, going to remove it\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'elser-example-movies'})"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "INDEX_NAME = \"elser-example-movies\"\n",
    "\n",
    "if(client.indices.exists(index=INDEX_NAME)):\n",
    "    print(\"The index has already existed, going to remove it\")\n",
    "    client.options(ignore_status=404).indices.delete(index=INDEX_NAME)\n",
    "\n",
    " \n",
    "client.indices.create(\n",
    "  index=INDEX_NAME,\n",
    "  settings={\n",
    "      \"index\": {\n",
    "          \"number_of_shards\": 1,\n",
    "          \"number_of_replicas\": 1,\n",
    "          \"default_pipeline\": \"elser-ingest-pipeline\"\n",
    "      }\n",
    "  },\n",
    "  mappings={\n",
    "    \"properties\": {\n",
    "      \"plot\": {\n",
    "        \"type\": \"text\",\n",
    "        \"fields\": {\n",
    "          \"keyword\": {\n",
    "            \"type\": \"keyword\",\n",
    "            \"ignore_above\": 256\n",
    "          }\n",
    "        }\n",
    "      },\n",
    "      \"ml.tokens\": {\n",
    "        \"type\": \"rank_features\"\n",
    "      },\n",
    "    }\n",
    "  }\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "a7b40798",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'title': 'Pulp Fiction', 'runtime': '154', 'plot': 'The lives of two mob hitmen, a boxer, a gangster and his wife, and a pair of diner bandits intertwine in four tales of violence and redemption.', 'keyScene': \"John Travolta is forced to inject adrenaline directly into Uma Thurman's heart after she overdoses on heroin.\", 'genre': 'Crime, Drama', 'released': '1994'}, {'title': 'The Dark Knight', 'runtime': '152', 'plot': 'When the menace known as the Joker wreaks havoc and chaos on the people of Gotham, Batman must accept one of the greatest psychological and physical tests of his ability to fight injustice.', 'keyScene': \"Batman angrily responds 'I’m Batman' when asked who he is by Falcone.\", 'genre': 'Action, Crime, Drama, Thriller', 'released': '2008'}, {'title': 'Fight Club', 'runtime': '139', 'plot': 'An insomniac office worker and a devil-may-care soapmaker form an underground fight club that evolves into something much, much more.', 'keyScene': 'Brad Pitt explains the rules of Fight Club to Edward Norton. The first rule of Fight Club is: You do not talk about Fight Club. The second rule of Fight Club is: You do not talk about Fight Club.', 'genre': 'Drama', 'released': '1999'}, {'title': 'Inception', 'runtime': '148', 'plot': 'A thief who steals corporate secrets through the use of dream-sharing technology is given the inverse task of planting an idea into thed of a C.E.O.', 'keyScene': \"Leonardo DiCaprio explains the concept of inception to Ellen Page by using a child's spinning top.\", 'genre': 'Action, Adventure, Sci-Fi, Thriller', 'released': '2010'}, {'title': 'The Matrix', 'runtime': '136', 'plot': 'A computer hacker learns from mysterious rebels about the true nature of his reality and his role in the war against its controllers.', 'keyScene': 'Red pill or blue pill? Morpheus offers Neo a choice between the red pill, which will allow him to learn the truth about the Matrix, or the blue pill, which will return him to his former life.', 'genre': 'Action, Sci-Fi', 'released': '1999'}, {'title': 'The Shawshank Redemption', 'runtime': '142', 'plot': 'Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency.', 'keyScene': 'Andy Dufresne escapes from Shawshank prison by crawling through a sewer pipe.', 'genre': 'Drama', 'released': '1994'}, {'title': 'Goodfellas', 'runtime': '146', 'plot': 'The story of Henry Hill and his life in the mob, covering his relationship with his wife Karen Hill and his mob partners Jimmy Conway and Tommy DeVito in the Italian-American crime syndicate.', 'keyScene': \"Joe Pesci's character Tommy DeVito shoots young Spider in the foot for not getting him a drink.\", 'genre': 'Biography, Crime, Drama', 'released': '1990'}, {'title': 'Se7en', 'runtime': '127', 'plot': 'Two detectives, a rookie and a veteran, hunt a serial killer who uses the seven deadly sins as his motives.', 'keyScene': \"Brad Pitt's character David Mills shoots John Doe after he reveals that he murdered Mills' wife.\", 'genre': 'Crime, Drama, Mystery, Thriller', 'released': '1995'}, {'title': 'The Silence of the Lambs', 'runtime': '118', 'plot': 'A young F.B.I. cadet must receive the help of an incarcerated and manipulative cannibal killer to help catch another serial killer, a madman who skins his victims.', 'keyScene': \"Hannibal Lecter explains to Clarice Starling that he ate a census taker's liver with some fava beans and a nice Chianti.\", 'genre': 'Crime, Drama, Thriller', 'released': '1991'}, {'title': 'The Godfather', 'runtime': '175', 'plot': \"An organized crime dynasty's aging patriarch transfers control of his clandestine empire to his reluctant son.\", 'keyScene': \"James Caan's character Sonny Corleone is shot to death at a toll booth by a number of machine gun toting enemies.\", 'genre': 'Crime, Drama', 'released': '1972'}, {'title': 'The Departed', 'runtime': '151', 'plot': 'An undercover cop and a mole in the police attempt to identify each other while infiltrating an Irish gang in South Boston.', 'keyScene': \"Leonardo DiCaprio's character Billy Costigan is shot to death by Matt Damon's character Colin Sullivan.\", 'genre': 'Crime, Drama, Thriller', 'released': '2006'}, {'title': 'The Usual Suspects', 'runtime': '106', 'plot': 'A sole survivor tells of the twisty events leading up to a horrific gun battle on a boat, which began when five criminals met at a seemingly random police lineup.', 'keyScene': \"Kevin Spacey's character Verbal Kint is revealed to be the mastermind behind the crime, when his limp disappears as he walks away from the police station.\", 'genre': 'Crime, Mystery, Thriller', 'released': '1995'}]\n",
      "Done indexing documents into `search-movies` index!\n"
     ]
    }
   ],
   "source": [
    "# Load data into a JSON object\n",
    "with open('data.json') as f:\n",
    "   data_json = json.load(f)\n",
    "\n",
    "print(data_json)\n",
    "\n",
    "# Prepare the documents to be indexed\n",
    "documents = []\n",
    "for doc in data_json:\n",
    "    documents.append({\n",
    "        \"_index\": INDEX_NAME,\n",
    "        \"_source\": doc,\n",
    "    })\n",
    "\n",
    "# Use helpers.bulk to index\n",
    "helpers.bulk(client, documents)\n",
    "\n",
    "print(\"Done indexing documents into `search-movies` index!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "8d7eef8c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Score: 0.6608096\n",
      "Title: Fight Club\n",
      "Plot: An insomniac office worker and a devil-may-care soapmaker form an underground fight club that evolves into something much, much more.\n",
      "\n",
      "Score: 0.3346656\n",
      "Title: The Usual Suspects\n",
      "Plot: A sole survivor tells of the twisty events leading up to a horrific gun battle on a boat, which began when five criminals met at a seemingly random police lineup.\n",
      "\n",
      "Score: 0.21901892\n",
      "Title: The Shawshank Redemption\n",
      "Plot: Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency.\n",
      "\n"
     ]
    }
   ],
   "source": [
    "response = client.search(\n",
    "    index='elser-example-movies', \n",
    "    size=3,\n",
    "    query={\n",
    "        \"text_expansion\": {\n",
    "            \"ml.tokens\": {\n",
    "                \"model_id\":\".elser_model_1\",\n",
    "                \"model_text\":\"child toy\"\n",
    "            }\n",
    "        }\n",
    "    }\n",
    ")\n",
    "\n",
    "for hit in response['hits']['hits']:\n",
    "    doc_id = hit['_id']\n",
    "    score = hit['_score']\n",
    "    title = hit['_source']['title']\n",
    "    plot = hit['_source']['plot']\n",
    "    print(f\"Score: {score}\\nTitle: {title}\\nPlot: {plot}\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b6e465b6",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
